IRIX 6.5 Applications 2001 May

home *** CD-ROM | disk | FTP | other *** search

/ IRIX 6.5 Applications 2001 May / SGI IRIX 6.5 Applications 2001 May.iso / dev / insight_dev.idb / usr / lib / Insight / dweb / dtl2html / splithtml.z / splithtml

Wrap

Text File | 2001-04-05 | 15.0 KB | 611 lines

#!/usr/bin/perl # # splithtml {-c cfg_file} {-l lvl_id} {-f fname_spec} {-v} {-noclean} input_f # # Based on a given level identifier (1,2,3...), matching what is # found in the TOC data file (default.dat), this sub-program splits # up the full HTML file into sep. HTML files. It also handles the # updating of any and all references (including those in the TOC # data files). # # requires perl5 # pull in the default configuration file; all local referenced # config files override whatever is in here # $| = 1; my($prog_dir) = $0; if( $prog_dir =~ /\// ) { $prog_dir =~ s/splithtml$//; $prog_dir .= 'splithtml.cfg'; require "$prog_dir"; } else { require 'splithtml.cfg'; } $_fname_spec = ''; $_lvl_id = -1; $_bVerbose = 0; $_bClean = 1; $_bHelpExist = 0; $_indxExist = ''; $_output_dir = ''; $_input_file = ''; $_input_dir = ''; $_ttl = ''; $_curr_title = ''; %_element_map = (); @_break_elements = (); @_break_titles = (); # read in cmd-line arguments # while(1) { if ($ARGV[0] eq "-c") { shift(@ARGV); require "$ARGV[0]"; shift(@ARGV); } elsif ($ARGV[0] eq "-l") { shift(@ARGV); $_lvl_id= ($ARGV[0] + 0); shift(@ARGV); } elsif ($ARGV[0] eq "-f") { shift(@ARGV); $_fname_spec = $ARGV[0]; shift(@ARGV); } elsif ($ARGV[0] eq "-o") { shift(@ARGV); $_output_dir = $ARGV[0]; shift(@ARGV); } elsif ($ARGV[0] eq "-v") { $_bVerbose = 1; shift(@ARGV); } elsif ($ARGV[0] eq "-noclean") { $_bClean = 0; shift(@ARGV); } else { last; } } $_input_file = $ARGV[(@ARGV + 0) - 1]; # see if any problems with arguments, as specified; also set up defaults # if( $_input_file eq '' ) { &usage(); exit(0); } my($i) = rindex($_input_file, "/"); if($i == -1) { $_input_dir = "."; } else { $_input_dir = substr($_input_file, 0, $i); } if( $_output_dir eq '' ) { $_output_dir = $_input_dir; } if( !(-w $_output_dir) ) { &usage(); print "ERROR: Cannot write to $_output_dir\n"; exit(-1); } if( $_fname_spec eq '' ) { $_fname_spec = $DEF_FNAME_SPEC; } $_fname_spec =~ s/^\///; if( $_lvl_id < 0 ) { $_lvl_id = $DEF_CHUNK_ID; } if( $_lvl_id <= 0 || $_lvl_id > 5 ) { $_lvl_id = 1; } if( $_bVerbose == 1 ) { print "\n\nsplithtml: will read from $_input_dir\n\n"; print "splithtml: processing $_input_dir/default.dat\n\n"; } # read in table of contents data file, configure what the filename(s) will be # if( &readToc("$_input_dir/default.dat") == 0 ) { &usage(); print "ERROR: Cannot read from $_input_dir/default.dat\n"; exit(-1); } if( $_bVerbose == 1 ) { print "splithtml: ", (@_break_elements+0), " breaking elements found\n\n"; print "splithtml: processing $_input_file\n\n"; } # read the master html file and break it up, fixing href's along the way # if( &createContentFiles($_input_file) == 0 ) { &usage(); print "ERROR: Cannot read from $_input_file\n"; exit(-1); } # build the toc structures/files # if( $_bVerbose == 1 ) { print "splithtml: processing TOC file(s)\n\n"; } &createHelpTopics(); if( &createTocFiles() == 0 ) { &usage(); print "ERROR: Cannot create TOC file(s)\n"; exit(-1); } # clean up # if( $_bClean == 1 ) { if( $_bVerbose == 1 ) { print "splithtml: cleaning up...\n\n"; } my($cmd) = "/bin/rm -f $_input_dir/\*.dat $_input_file"; system($cmd); print "ERROR: ($!) executing '$cmd'\n" if ($?); } exit(0); # # void usage() # sub usage { print "\n\n", "usage: splithtml {-c cfg_file} {-l lvl_id} {-f fname_spec}\n", " {-o output_dir} {-v} {-noclean} in_html_file\n\n", "-c cfg_file : is the configuration file to use. See the default file:\n", " /usr/lib/Insight/dweb/dtl2html/splithtml.cfg for info.\n\n", "-l lvl_id : is the level to break/chunk on (based on TOC)\n", " default used is \"1\"\n\n", "-f fname_spec : is the filename convention to use, must have a\n", " \"%d\" within the string, as in \"sgi%05d.html\"\n", " which serves as the default is not provided.\n\n", "-o output_dir : location to write the output files to, default is the\n", " same directory the in_html_file is from.\n\n", "-v : operate in verbose mode.\n\n", "-noclean : do not remove the old fullbook.htm{l} file and it's\n", " associated TOC data files. They are removed by default.\n\n", "in_html_file : REQUIRED. Name of the fullbook.htm{l} file to \n", " process. Also denotes location of the TOC data\n", " files. Files created will be written to the same\n", " area, unless otherwise designated\n\n"; } # # int readToc(string $tocfile) # sub readToc { my($f) = @_; my(@c) = (); my($i,$j) = 1; my($curr_fname,$s) = ''; open(TOCF, $f) || return 0; while(<TOCF>) { chop; $_ =~ s/^\s//g; $_ =~ s/\s$//g; @c = split('\|', $_); $c[1] =~ s/^[\ \s]+//; $c[1] =~ s/[\ \s]+$//; # this is the title line # if( $c[0] eq '0' && $_ttl eq '' ) { $_ttl = $c[1]; next; } # break at these levels; configure filename to use # if( ($c[0] + 0) <= $_lvl_id ) { # store only those that we are breaking on # push(@_break_elements, $c[2]); push(@_break_titles, $c[1]); $curr_fname = sprintf($_fname_spec, $i++); } # use as quick lookup to determine what each id is mapped to # $s = $curr_fname . ($c[2] =~ /^#/ ? '' : '#') . $c[2]; $_element_map{"$c[2]"} = $s; # back of the book index # if( $c[1] =~ /^Index$/i ) { $_indxExist = $curr_fname; } } close(TOCF); return 1; } # # int createContentFiles(string $mainHtml) # sub createContentFiles { my($f) = @_; my($id, $curr_fname, $s, $tmp) = ''; my($i, $j) = 0; my($curr_ptr) = -1; open(FP, $f) || return 0; while(<FP>) { if( $_ =~ /^<\!\-\-\ SGIEND\:/ ) { if( $curr_fname ne '' ) { $_curr_title = $_break_titles[$curr_ptr]; $s = &std_hdrftr($curr_ptr, $STD_FTR); print FP_DOC $s; close(FP_DOC); } last; } # found a marker, start of a section # if( $_ =~ /^<\!\-\-\ SGITOC\:/ && $_ =~ /\|(SGI_[\d]+)/ ) { $id = $1; # see if we need to break and create a new file # for( $i=0, $j=-1; $i < (@_break_elements + 0); $i++ ) { if($_break_elements[$i] eq $id || $_break_elements[$i] eq "#$id" ) { $j = $i; $i = (@_break_elements + 0); } } if( $j != -1 ) { if( $curr_fname ne '' ) { $_curr_title = $_break_titles[$curr_ptr]; $s = &std_hdrftr($curr_ptr, $STD_FTR); print FP_DOC $s; close(FP_DOC); } $curr_ptr = $j; $curr_fname = $_output_dir . '/' . $_element_map{"$_break_elements[$j]"}; $curr_fname =~ s/\#\w+$//; if( $_bVerbose == 1 ) { print "splithtml: creating $curr_fname\n\n"; } open(FP_DOC, "> $curr_fname") || return 0; $_curr_title = $_break_titles[$curr_ptr]; $s = &std_hdrftr($curr_ptr, $STD_HDR); print FP_DOC $s; } } # adjust all element references # if( $curr_fname ne '' ) { foreach $s (keys %_element_map) { $tmp = $_element_map{$s}; $_ =~ s/\"$s\"/\"$tmp\"/g; } print FP_DOC $_; } } close(FP); return 1; } # # int createTocFiles() # sub createTocFiles { my(@c) = ('default', 'figures', 'tables', 'examples'); my(@tocf,@copy_tocf) = (); my($i,$j) = 1; my($str,$tmp,$pat,$f,$s,$exp_s,$exp_f,$tt) = ''; foreach (@c) { $s = $_input_dir . '/' . $_ . '.dat'; if( -r $s ) { push(@tocf, $_); push(@copy_tocf, $_); } } foreach $tt (@tocf) { $s = "$_input_dir/$tt" . '.dat'; open(TOC_IN_F, $s) || return 0; $f = "$_output_dir/" . ($tt eq "default" ? 'index' : $tt) . '.html'; open(TOC_OUT_F, "> $f") || return 0; if( $_bVerbose == 1 ) { print "splithtml: creating $f\n\n"; } if( $tt eq "default" ) { # setup an expanded toc file # $exp_f = "$_output_dir/toc_full.html"; open(TOC_EXP_OUT_F, "> $exp_f") || return 0; } $_curr_title = $TOC_TITLES{$tt}; $s = &std_hdrftr(1, $STD_TOC_HDR); if( $_bHelpExist == 1 ) { $tmp = "<A HREF=\"help.html\">" . $TOC_TITLES{'help'} . "</A>"; } else { $tmp = " "; } $s =~ s/%%LHELP/$tmp/g; if( $_indxExist ne '' ) { $tmp = "<A HREF=\"$_indxExist\">" . $TOC_TITLES{'indx'} . "</A>"; } else { $tmp = " "; } $s =~ s/%%LINDX/$tmp/g; foreach $f (@copy_tocf) { if( $f ne $tt ) { $tmp = "<A HREF=\"" . ($f eq "default" ? 'index' : $f) . ".html\">" . $TOC_TITLES{$f} . "</A>"; } else { $tmp = $TOC_TITLES{$f}; } $pat = '%%L' . uc($f); $s =~ s/$pat/$tmp/g; } if( $tt eq "default" ) { $s =~ s/%%LBORDER/1/; $exp_s = $s; $tmp = "<A HREF=\"toc_full.html\">" . $TOC_TITLES{'expand'} ."</A>"; $s =~ s/%%LEXPAND/$tmp/; $tmp = $TOC_TITLES{'collapse'}; $s =~ s/%%LCOLLAPSE/$tmp/; $tmp = "<A HREF=\"index.html\">" . $TOC_TITLES{'collapse'} ."</A>"; $exp_s =~ s/%%LCOLLAPSE/$tmp/; $tmp = $TOC_TITLES{'expand'}; $exp_s =~ s/%%LEXPAND/$tmp/; $exp_s =~ s/%%L\w+//g; print TOC_EXP_OUT_F $exp_s; } else { $s =~ s/%%LBORDER/0/; } $s =~ s/%%L\w+//g; print TOC_OUT_F $s; while(<TOC_IN_F>) { chop; @c = split('\|', $_); if( $c[0] eq '0' ) { next; } # see if file actually exists, if not, do not print this item # $str = $_element_map{$c[2]}; if( $str eq '' ) { next; } $str =~ s/\#[\w]+$//; $str = "$_output_dir/" . $str; if( -r $str ) { if( $tt eq "default" && ($c[0] + 0) == 1 ) { print TOC_OUT_F "\n \n"; print TOC_EXP_OUT_F "\n \n"; } if( $tt eq "default" ) { if( ($c[0] + 0) < $DEF_EXP_LEVEL ) { print TOC_OUT_F (' ' x ($c[0] + 0)), "<A HREF=\"", $_element_map{$c[2]}, "\">", $c[1], "</A> \n"; } print TOC_EXP_OUT_F (' ' x ($c[0] + 0)), "<A HREF=\"", $_element_map{$c[2]}, "\">", $c[1], "</A> \n"; } else { print TOC_OUT_F (' ' x ($c[0] + 0)), "<A HREF=\"", $_element_map{$c[2]}, "\">", $c[1], "</A> \n"; } } } close(TOC_IN_F); $s = &std_hdrftr(1, $STD_TOC_FTR); print TOC_OUT_F $s; close(TOC_OUT_F); if( $tt eq "default" ) { print TOC_EXP_OUT_F $s; close(TOC_EXP_OUT_F); } } return 1; } # # pick up any helpid's and create a simple structure # sub createHelpTopics { my($tmp,$s) = ''; $tmp = $_fname_spec; $tmp =~ s/\%[\d]*d/\*/g; $s = "grep 'SGI_HELPID:' $_output_dir/$tmp"; open(HELP_IN, "$s |") || return; $s = "$_output_dir/help.html"; open(HELP_OUT, "> $s") || return; if( $_bVerbose == 1 ) { print "splithtml: creating help topics file ($s)\n\n"; } $s = $STD_HDR; $s =~ s/%%BK_TITLE/$_ttl/g; $s =~ s/%%CURR_TITLE/$TOC_TITLES{'help'}/g; $s =~ s/%%PREV_URL/ /g; $s =~ s/%%NEXT_URL/ /g; $s .= "\n\n<UL>\n"; print HELP_OUT $s; my($fname, $id, $title, $ht_set) = ''; while(<HELP_IN>) { chop; ($fname, $id, $title) = ''; if( $_ =~ /\/([\w\.]+):/ ) { $fname = $1; } if( $_ =~ /\ NAME=\"([\ \!#-~]+)\"/ ) { $id = $1; } if( $_ =~ /\/A>([\ -~]+)/ ) { $title = $1; $title =~ s/<[\w]{1,4}>//g; $title =~ s/<[\/]{1}[\w]{1,4}>//g; } print HELP_OUT "<LI><A href=\"$fname#$id\">$title</A></LI>\n"; $ht_set .= "\n"; $found_one = 1; } close(HELP_IN); $s = "\n</UL>\n\n\n" . $ht_set . "\n\n"; $s .= $STD_FTR; $s =~ s/%%BK_TITLE/$_ttl/g; $s =~ s/%%CURR_TITLE/$TOC_TITLES{'help'}/g; $s =~ s/%%PREV_URL/ /g; $s =~ s/%%NEXT_URL/ /g; print HELP_OUT $s; close(HELP_OUT); # no help topics found # if( $ht_set eq '' ) { unlink("$_output_dir/help.html"); } else { $_bHelpExist = 1; } } # # header and footer methods # sub std_hdrftr { my($s, $in_buf) = @_; my($id) = ($s + 0); my($str, $tmp,$buf) = ''; $buf = $in_buf; $buf =~ s/%%BK_TITLE/$_ttl/g; $buf =~ s/%%CURR_TITLE/$_curr_title/g; if($id == 0) { $tmp = ' '; } else { $str = $_element_map{"$_break_elements[($id - 1)]"}; $str =~ s/\#[\w]+$//; $tmp = "<A HREF=\"$str\">" . $_break_titles[($id - 1)] . "</A>"; } $buf =~ s/%%PREV_URL/$tmp/g; if($id >= ((@_break_elements + 0) - 1)) { $tmp = ' '; } else { $str = $_element_map{"$_break_elements[($id + 1)]"}; $str =~ s/\#[\w]+$//; $tmp = "<A HREF=\"$str\">" . $_break_titles[($id + 1)] . "</A>"; } $buf =~ s/%%NEXT_URL/$tmp/g; return "$buf"; }